#Load in the needed packages
library(ggplot2)
library(reshape2)
library(dplyr)
library(readr)
library(plotly)
library(tidyverse)
library(ggvis)
library(viridis)
library(RColorBrewer)
library(stats)
library(reshape)
# Load all the csv files
Country <- read_csv("~/working/data_science_folders/data_munging_EDA/group_project_2_munging/world-development-indicators/Country.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   LatestIndustrialData = col_double(),
##   LatestTradeData = col_double(),
##   LatestWaterWithdrawalData = col_double()
## )
## See spec(...) for full column specifications.
CountryNotes <- read_csv("~/working/data_science_folders/data_munging_EDA/group_project_2_munging/world-development-indicators/CountryNotes.csv")
## Parsed with column specification:
## cols(
##   Countrycode = col_character(),
##   Seriescode = col_character(),
##   Description = col_character()
## )
Footnotes <- read_csv("~/working/data_science_folders/data_munging_EDA/group_project_2_munging/world-development-indicators/Footnotes.csv")
## Parsed with column specification:
## cols(
##   Countrycode = col_character(),
##   Seriescode = col_character(),
##   Year = col_character(),
##   Description = col_character()
## )
Indicators <- read_csv("~/working/data_science_folders/data_munging_EDA/group_project_2_munging/world-development-indicators/Indicators.csv")
## Parsed with column specification:
## cols(
##   CountryName = col_character(),
##   CountryCode = col_character(),
##   IndicatorName = col_character(),
##   IndicatorCode = col_character(),
##   Year = col_double(),
##   Value = col_double()
## )
Series <- read_csv("~/working/data_science_folders/data_munging_EDA/group_project_2_munging/world-development-indicators/Series.csv", 
    comment = "#")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   OtherNotes = col_logical(),
##   OtherWebLinks = col_logical(),
##   RelatedIndicators = col_logical()
## )
## See spec(...) for full column specifications.
SeriesNotes <- read_csv("~/working/data_science_folders/data_munging_EDA/group_project_2_munging/world-development-indicators/SeriesNotes.csv")
## Parsed with column specification:
## cols(
##   Seriescode = col_character(),
##   Year = col_character(),
##   Description = col_character()
## )

Now that all our data is loaded in, the next step would be to list all of the different indicators present in this dataset.

#Make the country name a factor
Indicators$CountryName <- as.factor(Indicators$CountryName)

#Get the counts
counts <- Indicators %>%
  group_by(IndicatorCode, IndicatorName) %>%
  summarise(NumCountries = n_distinct(CountryName),
            NumYears     = n_distinct(Year),
            FirstYear    = min(Year),
            LastYear     = max(Year))
counts$IndicatorName <- gsub("\\$", "dollar", counts$IndicatorName)
View(counts)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/
## Resources/modules/R_de.so'' had status 1
#Create BRICS indicator
brics <- c("BRA", "RUS", "IND", "CHN", "ZAF")
brics.Indicators <- subset(Indicators, CountryCode %in% brics)

#Create the GDP growth (annual %)
GDP_growth_annual_percent <- subset(brics.Indicators, IndicatorCode == "NY.GDP.MKTP.KD.ZG")

#Plot the variable
ggplot(data = GDP_growth_annual_percent, aes(Year, Value)) +
    geom_line(aes(color = CountryCode), size = 0.85) +
    scale_x_continuous(breaks = seq(1960, 2014, 5)) +
    theme_bw(base_size = 12, base_family = "Helvetica")+
    theme_classic() +
    labs(title = "GDP growth increased for most BRICS, especially China") +
    xlab("Year") +
    ylab("Annual Percent Growth")

#Create the GDP per capita variable current US dollar
GDP_per_capita_current_US <- subset(brics.Indicators, IndicatorCode == "NY.GDP.PCAP.CD")

#Plot the variable
ggplot(data = GDP_per_capita_current_US, aes(Year, Value)) +
    geom_line(aes(color = CountryCode), size = 0.85) +
    scale_x_continuous(breaks = seq(1960, 2014, 5)) +
    theme_bw(base_size = 12, base_family = "Helvetica")+
    theme_classic() +
    labs(title = "GDP per capita has risen sharply for most countries since the early 2000s") +
    xlab("Year") +
    ylab("Current US Dollar")

#Create the GDP per capita, PPP (current international $)
GDP_per_capita_PPP_international <- subset(brics.Indicators, IndicatorCode == "NY.GDP.PCAP.PP.CD")

#Plot the variable
ggplot(data = GDP_per_capita_PPP_international, aes(Year, Value)) +
    geom_line(aes(color = CountryCode), size = 0.85) +
    scale_x_continuous(breaks = seq(1960, 2014, 5)) +
    theme_bw(base_size = 12, base_family = "Helvetica")+
    theme_classic() +
    labs(title = "GDP per capita, converted through PPP shows Russia is ahead") +
    xlab("Year") +
    ylab("Current International Dollar")

#Check for adjusted net national income
adjusted_net_national_income <- subset(brics.Indicators, IndicatorCode == "NY.ADJ.NNTY.CD")

#Plot the variable
ggplot(data = adjusted_net_national_income, aes(Year, Value)) +
    geom_line(aes(color = CountryCode), size = 0.85) +
    scale_x_continuous(breaks = seq(1960, 2014, 5)) +
    theme_bw(base_size = 12, base_family = "Helvetica")+
    theme_classic() +
    labs(title = "Adjusted Net National Income has risen sharply for China") +
    xlab("Year") +
    ylab("Current US dollar")

#Check for Poverty gap at $1.90 a day (2011 PPP) (%)
poverty_gap <- dplyr::filter(brics.Indicators, 
                      IndicatorCode == "SI.POV.GAPS" |
                      IndicatorCode == "NY.GDP.PCAP.CD") %>%
  dplyr::filter(      Year == 1993
                      ) %>%
  pivot_wider(names_from = "IndicatorCode",
              values_from = "Value")

pg_a <- filter(poverty_gap, IndicatorName == "GDP per capita (current US$)") %>%
  select(-IndicatorName, -SI.POV.GAPS)

pg_b <- filter(poverty_gap, IndicatorName == "Poverty gap at $1.90 a day (2011 PPP) (%)") %>%
  select(CountryCode, SI.POV.GAPS)

poverty_gap <- left_join(pg_a, pg_b, by = "CountryCode")

ggplot(data = poverty_gap,
       aes(x = SI.POV.GAPS,
           y = NY.GDP.PCAP.CD,
           color = CountryName)) +
  geom_point(size = 3) +
  labs(y = "GDP per capita",
       x = "Poverty gap at $1.90 a day (2011 PPP) (%)",
       title = "Higher per capita GDP is associated with a smaller poverty gap",
       caption = "Data from 1993") + 
  xlim(0,50) +
  theme_classic()

poverty_gap <- subset(brics.Indicators, IndicatorCode == "SI.POV.GAPS")

ggplot(data = poverty_gap, mapping = aes(x = CountryName, y= Value)) +
  geom_boxplot()

poverty_gap %>%
  ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
    geom_violin() +
    scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("Violin chart showing how BRICS countries vary across poverty values") +
    xlab("BRICS Countries") +
    ylab("Poverty gap at $1.90 a day (2011 PPP) (%)")

#Check for Poverty gap at $3.10 a day (2011 PPP) (%)
poverty_gap <- dplyr::filter(brics.Indicators, 
                      IndicatorCode == "SI.POV.GAP2" |
                      IndicatorCode == "NY.GDP.PCAP.CD") %>%
  dplyr::filter(      Year == 1993
                      ) %>%
  pivot_wider(names_from = "IndicatorCode",
              values_from = "Value")

pg_a <- filter(poverty_gap, IndicatorName == "GDP per capita (current US$)") %>%
  select(-IndicatorName, -SI.POV.GAP2)

pg_b <- filter(poverty_gap, IndicatorName == "Poverty gap at $3.10 a day (2011 PPP) (%)") %>%
  select(CountryCode, SI.POV.GAP2)

poverty_gap <- left_join(pg_a, pg_b, by = "CountryCode")

ggplot(data = poverty_gap,
       aes(x = SI.POV.GAP2,
           y = NY.GDP.PCAP.CD,
           color = CountryName)) +
  geom_point(size = 3) +
  labs(y = "GDP per capita",
       x = "Poverty gap at $3.10 a day (2011 PPP) (%)",
       title = "Higher per capita GDP is associated with a smaller poverty gap",
       caption = "Data from 1993") + 
  xlim(0,50) +
  theme_classic()

poverty_gap <- subset(brics.Indicators, IndicatorCode == "SI.POV.GAP2")

ggplot(data = poverty_gap, mapping = aes(x = CountryName, y= Value)) +
  geom_boxplot()

poverty_gap %>%
  ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
    geom_violin() +
    scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("Violin chart showing how BRICS countries vary across poverty values") +
    xlab("BRICS Countries") +
    ylab("Poverty gap at $3.10 a day (2011 PPP) (%)")

#Check for Poverty headcount ratio at $1.90 a day
poverty_gap <- dplyr::filter(brics.Indicators, 
                      IndicatorCode == "SI.POV.DDAY" |
                      IndicatorCode == "NY.GDP.PCAP.CD") %>%
  dplyr::filter(      Year == 1993
                       ) %>%
  pivot_wider(names_from = "IndicatorCode",
              values_from = "Value")

pg_a <- filter(poverty_gap, IndicatorName == "GDP per capita (current US$)") %>%
  select(-IndicatorName, -SI.POV.DDAY)

pg_b <- filter(poverty_gap, IndicatorName == "Poverty headcount ratio at $1.90 a day (2011 PPP) (% of population)") %>%
  select(CountryCode, SI.POV.DDAY)

poverty_gap <- left_join(pg_a, pg_b, by = "CountryCode")

ggplot(data = poverty_gap,
       aes(x = SI.POV.DDAY,
           y = NY.GDP.PCAP.CD,
           color = CountryName)) +
  geom_point(size = 3) +
  xlim(0,100) +
  labs(y = "GDP per capita",
       x = "Poverty headcount ratio at $1.90 a day (2011 PPP) (% of population)",
       title = "Higher per capita GDP is associated with a smaller poverty headcount",
       caption = "Data from 1993") + 
  theme_classic()

poverty_gap <- subset(brics.Indicators, IndicatorCode == "SI.POV.DDAY")

ggplot(data = poverty_gap, mapping = aes(x = CountryName, y= Value)) +
  geom_boxplot()

poverty_gap %>%
  ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
    geom_violin() +
    scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("Violin chart showing how BRICS countries vary across poverty values") +
    xlab("BRICS Countries") +
    ylab("Poverty headcount ratio at $1.90 a day (2011 PPP) (% of population)")

#Check for Poverty headcount ratio at $3.10 a day
poverty_gap <- dplyr::filter(brics.Indicators, 
                      IndicatorCode == "SI.POV.2DAY" |
                      IndicatorCode == "NY.GDP.PCAP.CD") %>%
  dplyr::filter(      Year == 1993
                      ) %>%
  pivot_wider(names_from = "IndicatorCode",
              values_from = "Value")

pg_a <- filter(poverty_gap, IndicatorName == "GDP per capita (current US$)") %>%
  select(-IndicatorName, -SI.POV.2DAY)

pg_b <- filter(poverty_gap, IndicatorName == "Poverty headcount ratio at $3.10 a day (2011 PPP) (% of population)") %>%
  select(CountryCode, SI.POV.2DAY)

poverty_gap <- left_join(pg_a, pg_b, by = "CountryCode")

ggplot(data = poverty_gap,
       aes(x = SI.POV.2DAY,
           y = NY.GDP.PCAP.CD,
           color = CountryName)) +
  geom_point(size = 3) +
  xlim(0,100) +
  labs(y = "GDP per capita",
       x = "Poverty headcount ratio at $3.10 a day (2011 PPP) (% of population)",
       title = "Higher per capita GDP is associated with a smaller poverty headcount",
       caption = "Data from 1993") + 
  theme_classic()

poverty_gap <- subset(brics.Indicators, IndicatorCode == "SI.POV.2DAY")

ggplot(data = poverty_gap, mapping = aes(x = CountryName, y= Value)) +
  geom_boxplot()

poverty_gap %>%
  ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
    geom_violin() +
    scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("Violin chart showing how BRICS countries vary across poverty values") +
    xlab("BRICS Countries") +
    ylab("Poverty headcount ratio at $3.10 a day (2011 PPP) (% of population)")

#Check out Agricultural land (% of land area)
agriculture_percent <- dplyr::filter(brics.Indicators, 
                      IndicatorCode == "AG.LND.AGRI.ZS" |
                      IndicatorCode == "NY.GDP.PCAP.CD") %>%
  pivot_wider(names_from = "IndicatorCode",
              values_from = "Value")

ap_a <- filter(agriculture_percent, IndicatorName == "GDP per capita (current US$)") %>%
  select(-IndicatorName, -AG.LND.AGRI.ZS)

ap_b <- filter(agriculture_percent, IndicatorName == "Agricultural land (% of land area)") %>%
  select(CountryCode, AG.LND.AGRI.ZS, Year)

agriculture_percent <- left_join(ap_a, ap_b, by = c("CountryCode" = "CountryCode", "Year" = "Year"))

start <- filter(agriculture_percent, Year == 1961 | Year == 1992 & CountryName == "Russian Federation")

ggplot(data = agriculture_percent,
       aes(x = AG.LND.AGRI.ZS,
           y = NY.GDP.PCAP.CD,
           color = CountryName)) +
  geom_path() +
  geom_point(data = start,
             aes(
               x = AG.LND.AGRI.ZS,
               y = NY.GDP.PCAP.CD)
             ) +
  xlim(0,100) +
  labs(y = "GDP per capita",
       x = "Agricultural land (% of land area)",
       title = "Changes in agricultural holdings seem unrelated to changes in GDP per capita",
       caption = "Dot indicates first year of data."
       ) + 
  theme_classic()
## Warning: Removed 12 rows containing missing values (geom_path).

#Check out Agricultural land (% of land area)
agriculture_population <- dplyr::filter(brics.Indicators, 
                      IndicatorCode == "AG.LND.AGRI.ZS" |
                      IndicatorCode == "SP.POP.GROW") %>%
  pivot_wider(names_from = "IndicatorCode",
              values_from = "Value")

agp_a <- filter(agriculture_population, IndicatorName == "Population growth (annual %)") %>%
  select(-IndicatorName, -AG.LND.AGRI.ZS)

agp_b <- filter(agriculture_population, IndicatorName == "Agricultural land (% of land area)") %>%
  select(CountryCode, AG.LND.AGRI.ZS, Year)

agriculture_population <- left_join(agp_a, agp_b, by = c("CountryCode" = "CountryCode", "Year" = "Year"))

start <- filter(agriculture_population, Year == 1961 | Year == 1992 & CountryName == "Russian Federation")

ggplot(data = agriculture_population,
       aes(x = AG.LND.AGRI.ZS,
           y = SP.POP.GROW,
           color = CountryName)) +
  geom_path() +
  geom_point(data = start,
             aes(
               x = AG.LND.AGRI.ZS,
               y = SP.POP.GROW)
             ) +
  xlim(0,100) +
  labs(y = "Population growth (annual %)",
       x = "Agricultural land (% of land area)",
       title = "No discernible relationship between population growth and agricultural land over time",
       caption = "Dot indicates first year of data."
       ) + 
  theme_classic()
## Warning: Removed 41 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

#Check the Agriculture, value added (% of GDP)
agrilcuture_value <- dplyr::filter(brics.Indicators, 
                      IndicatorCode == "NV.AGR.TOTL.ZS" |
                      IndicatorCode == "NY.GDP.PCAP.CD") %>%
  pivot_wider(names_from = "IndicatorCode",
              values_from = "Value")

av_a <- filter(agrilcuture_value, IndicatorName == "GDP per capita (current US$)") %>%
  select(-IndicatorName, -NV.AGR.TOTL.ZS)

av_b <- filter(agrilcuture_value, IndicatorName == "Agriculture, value added (% of GDP)") %>%
  select(CountryCode, NV.AGR.TOTL.ZS, Year)

agrilcuture_value <- left_join(av_a, av_b, by = c("CountryCode" = "CountryCode", "Year" = "Year"))

start <- filter(agrilcuture_value, Year == 1961 | Year == 1992 & CountryName == "Russian Federation")

ggplot(data = agrilcuture_value,
       aes(x = NV.AGR.TOTL.ZS,
           y = NY.GDP.PCAP.CD)) +
  geom_path() +
  geom_point(data = start,
             aes(
               x = NV.AGR.TOTL.ZS,
               y = NY.GDP.PCAP.CD)
             ) +
  facet_grid(.~CountryName) +
  xlim(0,50) +
  labs(y = "GDP per capita",
       x = "Agriculture, value added (% of GDP)",
       title = "Overall contribution from agriculture doesn't seem to impact increases in GDP",
       caption = "Dot indicates first year of data."
       ) + 
  theme_classic()

library(plotly)

all_countries <- Indicators %>%
  select(CountryCode) %>%
  group_by(CountryCode) %>%
  summarise()

#Check the Agriculture, value added (% of GDP)
agrilcuture_value <- dplyr::filter(brics.Indicators, 
                      IndicatorCode == "NV.AGR.TOTL.ZS" |
                      IndicatorCode == "NY.GDP.PCAP.CD") %>%
  pivot_wider(names_from = "IndicatorCode",
              values_from = "Value")

av_a <- filter(agrilcuture_value, IndicatorName == "GDP per capita (current US$)") %>%
  select(-IndicatorName, -NV.AGR.TOTL.ZS)

av_b <- filter(agrilcuture_value, IndicatorName == "Agriculture, value added (% of GDP)") %>%
  select(CountryCode, NV.AGR.TOTL.ZS, Year)

agrilcuture_value <- left_join(av_a, av_b, by = c("CountryCode" = "CountryCode", "Year" = "Year"))


agrilcuture_value <- left_join(all_countries, agrilcuture_value, by = "CountryCode") 


agrilcuture_value$NV.AGR.TOTL.ZS <- replace_na(agrilcuture_value$NV.AGR.TOTL.ZS, 0)


# light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)

# specify map projection/options
g <- list(
  showframe = FALSE,
  showcoastlines = FALSE,
  projection = list(type = 'Albers')
)

p <- plot_geo(agrilcuture_value) %>%
  add_trace(
    z = ~NV.AGR.TOTL.ZS,
    color = ~NV.AGR.TOTL.ZS, 
    colors = 'Blues',
    text = ~CountryName, 
    locations = ~CountryCode, 
    marker = list(line = l)
  ) %>%
  colorbar(title = 'Agriculture, value added (% of GDP)') %>%
  layout(
    title = '',
    geo = g
  )
p
mortality_rate_under_5 <- subset(brics.Indicators, IndicatorCode == "SH.DYN.MORT")

ggplot(data = mortality_rate_under_5, aes(Year, Value)) +
    geom_line(aes(color = CountryCode), size = 0.85) +
    scale_x_continuous(breaks = seq(1960, 2014, 5)) +
    theme_bw(base_size = 12, base_family = "Helvetica") +
    theme_classic() +
    ggtitle("Mortality rate for BRICS countries steadily decreases") +
    ylab("Mortality rate, under-5")

#subsetting for BRICS countries
brics <- Indicators %>%
  filter(Indicators$CountryName == "Brazil" | Indicators$CountryName == "India" | Indicators$CountryName == "South Africa" | Indicators$CountryName == "Russian Federation" | Indicators$CountryName == "China")

#View(brics)

ggplot(data = mortality_rate_under_5, mapping = aes(x = CountryName, y= Value)) +
  geom_boxplot()

mortality_rate_under_5 %>%
  ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
    geom_violin() +
    scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("Distribution of mortality rates highlights differences in each countries path to lower infant mortality") +
    xlab("BRICS Countries")

#Check out Mortality rate, under-5
population_mortality <- dplyr::filter(brics.Indicators, 
                      IndicatorCode == "SH.DYN.MORT" |
                      IndicatorCode == "SP.POP.GROW") %>%
  pivot_wider(names_from = "IndicatorCode",
              values_from = "Value")

m_a <- filter(population_mortality, IndicatorName == "Mortality rate, under-5 (per 1,000)") %>%
  select(-IndicatorName, -SP.POP.GROW)

m_b <- filter(population_mortality, IndicatorName == "Population growth (annual %)") %>%
  select(CountryCode, SP.POP.GROW, Year)

population_mortality <- left_join(m_a, m_b, by = c("CountryCode" = "CountryCode", "Year" = "Year"))

start <- filter(population_mortality, Year == 1960 | Year == 1970 & CountryName == "Russian Federation" | Year == 1969 & CountryName == "China" | Year == 1974 & CountryName == "South Africa")

ggplot(data = population_mortality,
       aes(x = SH.DYN.MORT,
           y = SP.POP.GROW,
           color = CountryName)) +
  geom_path() +
  geom_point(data = start,
             aes(
               x = SH.DYN.MORT,
               y = SP.POP.GROW)
             ) +
  labs(y = "Population growth (annual %)",
       x = "Mortality rate, under-5",
       title = "",
       caption = "Dot indicates first year of data."
       ) + 
  theme_classic() +
  ggtitle("Mortality rate decrease also shows a decrease in the population growth as well")
## Warning: Removed 5 rows containing missing values (geom_path).

unemployment_rate <- subset(brics.Indicators, IndicatorCode == "SL.UEM.TOTL.ZS")

ggplot(data = unemployment_rate, aes(Year, Value)) +
    geom_line(aes(color = CountryCode), size = 0.85) +
    scale_x_continuous(breaks = seq(1960, 2014, 5)) +
    theme_bw(base_size = 12, base_family = "Helvetica") +
    theme_classic() +
    ggtitle("Unemployment remains low for the most part other than for South Africa") +
    ylab("Unemployment,total (% of total labor force) (modeled ILO estimate)")

ggplot(data = unemployment_rate, mapping = aes(x = CountryName, y= Value)) +
  geom_boxplot()

unemployment_rate %>%
  ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
    geom_violin() +
    scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("Unemployment,total (% of total labor force) (modeled ILO estimate)") +
    xlab("BRICS Countries")

#Check out education against unemployment
unemployment_education <- dplyr::filter(brics.Indicators, 
                      IndicatorCode == "SL.UEM.TOTL.ZS" |
                      IndicatorCode == "SE.XPD.TOTL.GD.ZS") %>%
  pivot_wider(names_from = "IndicatorCode",
              values_from = "Value")

pm_a <- filter(unemployment_education, IndicatorName == "Government expenditure on education as % of GDP (%)") %>%
  select(-IndicatorName, -SL.UEM.TOTL.ZS)

pm_b <- filter(unemployment_education, IndicatorName == "Unemployment, total (% of total labor force)") %>%
  select(CountryCode, SL.UEM.TOTL.ZS, Year)

unemployment_education <- left_join(pm_a, pm_b, by = c("CountryCode" = "CountryCode", "Year" = "Year"))

start <- filter(unemployment_education, Year == 1991 | Year == 2000 & CountryName == "Russian Federation" | Year == 1992 & CountryName == "China" | Year == 1995 & CountryName == "Brazil"| Year == 1997 & CountryName == "India")

ggplot(data = unemployment_education,
       aes(x = SL.UEM.TOTL.ZS,
           y = SE.XPD.TOTL.GD.ZS    ,
           color = CountryName)) +
  geom_path() +
  geom_point(data = start,
             aes(
               x = SL.UEM.TOTL.ZS,
               y = SE.XPD.TOTL.GD.ZS    )
             ) +
  labs(y = "Unemployment,total (% of total labor force) (modeled ILO estimate)",
       x = "Government expenditure on education, total (% of GDP)",
       title = "",
       caption = "Dot indicates first year of data."
       ) + 
  theme_classic() +
  ggtitle("")
## Warning: Removed 22 rows containing missing values (geom_path).

population_growth_rate <- subset(brics.Indicators, IndicatorCode == "SP.POP.GROW")

ggplot(data = population_growth_rate, aes(Year, Value)) +
    geom_line(aes(color = CountryCode), size = 0.85) +
    scale_x_continuous(breaks = seq(1960, 2014, 5)) +
    theme_bw(base_size = 12, base_family = "Helvetica") +
    theme_classic() +
    ggtitle("All BRICS countries steadily decrease their annual growth rates") +
    ylab("Population growth (annual %)")

ggplot(data = population_growth_rate, mapping = aes(x = CountryName, y= Value)) +
  geom_boxplot()

population_growth_rate %>%
  ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
    geom_violin() +
    scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("Population growth (annual %)") +
    xlab("BRICS Countries")

govt_exp_edu <- subset(brics.Indicators, IndicatorCode == "SE.XPD.TOTL.GD.ZS")

ggplot(data = govt_exp_edu, aes(Year, Value)) +
    geom_line(aes(color = CountryCode), size = 0.85) +
    scale_x_continuous(breaks = seq(1960, 2014, 5)) +
    theme_bw(base_size = 12, base_family = "Helvetica") +
    theme_classic() +
    ggtitle("Education spending differs substantially across countries and over time") +
    ylab("Government expenditure on education, total (% of GDP)")

ggplot(data = govt_exp_edu, mapping = aes(x = CountryName, y= Value)) +
  geom_boxplot()

govt_exp_edu %>%
  ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
    geom_violin() +
    scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("Population growth (annual %)") +
    xlab("BRICS Countries")

brics <- Indicators %>%
  filter(Indicators$CountryName == "Brazil" | Indicators$CountryName == "India" | Indicators$CountryName == "South Africa" | Indicators$CountryName == "Russian Federation" | Indicators$CountryName == "China")

#Filter four different expenditures as % of GDP
expenditures <- brics %>%
  filter(brics$IndicatorName == "Military expenditure (% of GDP)" | brics$IndicatorName == "Research and development expenditure (% of GDP)" | brics$IndicatorName == "Health expenditure, total (% of GDP)" | brics$IndicatorName == "Government expenditure on education as % of GDP (%)")

#View(expenditures)
ggplot(data=expenditures, aes(x=stringr::str_wrap(CountryName, 10), y=Value, fill=IndicatorName)) +
  geom_bar(stat="identity") + 
  xlab("BRICS Countries")

BRICS<-c("BRA","CHN","IND","RUS","ZAF")
military_exports <- subset(Indicators, IndicatorCode == "MS.MIL.XPRT.KD" & Year > 1991)
military_imports <- subset(Indicators, IndicatorCode == "MS.MIL.MPRT.KD" & Year > 1991)
total_reserves <- subset(Indicators, IndicatorCode == "FI.RES.TOTL.CD" & Year > 1991)
total_reserves_without_gold <- subset(Indicators, IndicatorCode == "FI.RES.XGLD.CD" & Year > 1991)
brics_uem_indicators <- subset(Indicators, IndicatorCode == "SL.UEM.TOTL.ZS" & Year > 1991)

#VALUE OF ARMS EXPORTS OF BRICS COUNTRIES
plot1<-ggplot(data=subset(military_exports, CountryCode %in% BRICS),aes(x=Year,y=Value,col=CountryName))+
        geom_line(size=1)+ylab("Exports")+
        ggtitle("ARMS EXPORTS OF BRICS COUNTRIES OVER THE YEARS")+
        theme(text=element_text(colour="black",face="bold",size=12),
        legend.title = element_text(colour="blue", size=16, face="bold"),
        legend.text = element_text(colour="black", size = 10, face = "bold",family = "Times New Roman"),
        legend.background = element_rect(fill="gray90", size=.5, linetype="dotted"))
plot1

#VALUE OF ARMS IMPORTS OF BRICS COUNTRIES
plot2<-ggplot(data=subset(military_imports, CountryCode %in% BRICS),aes(x=Year,y=Value,col=CountryName))+
        geom_line(size=1)+ylab("Imports")+
        ggtitle("ARMS IMPORTS OF BRICS COUNTRIES OVER THE YEARS")+
        theme(text=element_text(colour="black",face="bold",size=12),
        legend.title = element_text(colour="blue", size=16, face="bold"),
        legend.text = element_text(colour="black", size = 10, face = "bold",family = "Times New Roman"),
        legend.background = element_rect(fill="gray90", size=.5, linetype="dotted"))
plot2

#TOTAL RESERVES OF BRICS COUNTRIES
plot3<-ggplot(data=subset(total_reserves, CountryCode %in% BRICS),aes(x=Year,y=Value,col=CountryName))+
        geom_line(size=1)+ylab("Total Reserves")+
        ggtitle("TOTAL RESERVES OF BRICS COUNTRIES OVER THE YEARS")+
        theme(text=element_text(colour="black",face="bold",size=12),
        legend.title = element_text(colour="blue", size=16, face="bold"),
        legend.text = element_text(colour="black", size = 10, face = "bold",family = "Times New Roman"),
        legend.background = element_rect(fill="gray90", size=.5, linetype="dotted"))
plot3

#TOTAL RESERVES MINUS GOLD OF BRICS COUNTRIES
plot4<-ggplot(data=subset(total_reserves_without_gold, CountryCode %in% BRICS),aes(x=Year,y=Value,col=CountryName))+
        geom_line(size=1)+ylab("Total Reserves")+
        ggtitle("TOTAL RESERVES MINUS GOLD OF BRICS COUNTRIES OVER THE YEARS")+
        theme(text=element_text(colour="black",face="bold",size=12),
        legend.title = element_text(colour="blue", size=16, face="bold"),
        legend.text = element_text(colour="black", size = 10, face = "bold",family = "Times New Roman"),
        legend.background = element_rect(fill="gray90", size=.5, linetype="dotted"))
plot4

all_in_all <- brics %>%
  filter(IndicatorCode == "SL.UEM.TOTL.ZS" | IndicatorCode == "SH.DYN.MORT" | IndicatorCode == "SP.POP.GROW" | IndicatorCode == "SE.XPD.TOTL.GD.ZS" | IndicatorCode == "NY.GDP.PCAP.CD" | IndicatorCode == "SI.POV.GAPS" | IndicatorCode == "AG.LND.AGRI.ZS") %>%
  select(CountryName, IndicatorName, Year, Value) %>% 
  spread(IndicatorName, Value) %>%
  dplyr::rename(
    agricul.land = `Agricultural land (% of land area)`, 
    unemployment = `Unemployment, total (% of total labor force)`, 
    pov.gap = `Poverty gap at $1.90 a day (2011 PPP) (%)`, 
    pop.growth = `Population growth (annual %)` , 
    mort.rate = `Mortality rate, under-5 (per 1,000)`, 
    gov.exp.edu = `Government expenditure on education as % of GDP (%)`, 
    GDP.per.cap = `GDP per capita (current US$)`
    )
  
#View(all_in_all)
new <- subset(all_in_all[ ,3:9])
library(ggcorrplot)
corr <- cor(new, use = "complete.obs")
ggcorrplot(corr, method = "circle")